# clear environment
rm(list=ls())

# load packages
library(ggplot2)
library(openxlsx)
library(mice)
library(readstata13)

# load data set with first chapter content
dat = openxlsx::read.xlsx("vsberichte_metadata.xlsx", sheet = 1)

# binary varibable indicating whether interior minister is from a center-right party
dat$cr_intmin <- ifelse(dat$intmin_party == "cdu" | dat$intmin_party== "csu" | dat$intmin_party=="fdp", 1, ifelse(dat$intmin_party=="pro", NA, 0))

# create chapter length variable
dat$rwe_chapter_length <- dat$page_right_end-dat$page_right_start
dat$lwe_chapter_length <- dat$page_left_end-dat$page_left_start

# ratio of chapter length
dat$rwe_lwe_chapter_ratio <- dat$rwe_chapter_length/dat$lwe_chapter_length
dat$rwe_lwe_chapter_ratio_log <- log(dat$rwe_chapter_length + 0.5) - log(dat$lwe_chapter_length + 0.5)

# center-right interior minister
dat$cr_intmin <- ifelse(dat$intmin_party == "cdu" | dat$intmin_party== "csu" | dat$intmin_party=="fdp", 1,
                        ifelse(dat$intmin_party=="pro", NA, 0))

# east indicator
east <- c("brandenburg", "mecklenburg-vorpommern", "sachsen", "sachsen-anhalt", "thüringen")
dat$east <- ifelse(dat$jurisdiction %in% east, 1, ifelse(dat$jurisdiction=="bund", NA, 0))

# create decade indicator
dat$decade <- NA
dat$decade[dat$year<1960]<-1950
dat$decade[dat$year>=1960 & dat$year<1970]<-1960
dat$decade[dat$year>=1970 & dat$year<1980]<-1970
dat$decade[dat$year>=1980 & dat$year<1990]<-1980
dat$decade[dat$year>=1990 & dat$year<2000]<-1990
dat$decade[dat$year>=2000 & dat$year<2010]<-2000
dat$decade[dat$year>=2010 & dat$year<2020]<-2010
dat$decade[dat$year>=2020 & dat$year<2030]<-2020

# ratio in crimes
dat$rwe_lwe_crime_ratio <- dat$extreme_crime_right / dat$extreme_crime_left

# create variables indicating if chapter on RWE/LWE appears first
dat$rwe_first <- ifelse(apply(dat[,c("page_right_start", "page_left_start", "page_islamist_start1", "page_islamist_start2")], 1, FUN=min, na.rm=T)==dat$page_right_start, 1, 0)
dat$lwe_first <- ifelse(apply(dat[,c("page_right_start", "page_left_start", "page_islamist_start1", "page_islamist_start2")], 1, FUN=min, na.rm=T)==dat$page_left_start, 1, 0)
dat$extremism_first <- ifelse(dat$rwe_first==1, "RWE", 
                              ifelse(dat$lwe_first==1, "LWE", 
                                     ifelse(dat$rwe_first!=1 & dat$lwe_first !=1 & !is.na(dat$rwe_lwe_chapter_ratio), "Other", NA)))

# one report from Northrine Westphalia in 1963 did not include a chapter on LWE, but one on RWE (which was first) so we need to manually recode the "lwe_first variable" there to 0
dat$lwe_first[dat$year==1963 & dat$jurisdiction=="nordrhein-westfalen"] = 0

# merge in polling data
polbar <- read.dta13("polbar_agg.dta")
polbar$jurisdiction <- tolower(polbar$jurisdiction)
dat$jurisdiction <- gsub("ü", "u", dat$jurisdiction)
dat <- merge(dat, polbar, by.x=c("jurisdiction", "year_pub"), by.y=c("jurisdiction", "year"), all.x=T)

#multiple imputation
dat_mice <- dat[!is.na(dat$rwe_first) & !is.na(dat$lwe_first) & !is.na(dat$cr_intmin),
                c("rwe_lwe_chapter_ratio", "rwe_lwe_chapter_ratio_log", "rwe_lwe_crime_ratio", "cr_intmin", "jurisdiction", "year",
                  "pmk_right", "pmk_left", "extreme_crime_violent_right", "extreme_crime_violent_left",
                  "person_right", "person_left", "vshead_party", "extreme_crime_islamist", "person_violent_islamist",
                  "rwe_first", "lwe_first", "extreme_crime_left", "extreme_crime_right", "polbar_fr_vote_pct", "elec_fr_vote_pct", "east")]
dat_imputed <- mice(dat_mice, m=20, maxit = 50, method = 'pmm', seed = 500)

# regression models
mod_1 <- glm(rwe_first ~ factor(cr_intmin), data=dat, family = binomial(link="logit"))
mod_2 <- glm(rwe_first ~ factor(cr_intmin) + factor(jurisdiction), data=dat, family = binomial(link="logit"))
mod_3 <- glm(rwe_first ~ factor(cr_intmin) + factor(jurisdiction) + factor(decade), data=dat, family = binomial(link="logit"))
mod_4 <- glm(rwe_first ~ factor(cr_intmin) + factor(jurisdiction) + factor(year), data=dat, family = binomial(link="logit"))
mod_5 <- pool(with(data=dat_imputed, exp=glm(rwe_first~factor(cr_intmin) + extreme_crime_right + factor(jurisdiction) + factor(year), family = binomial(link="logit"))))

mod_6 <- glm(lwe_first ~ factor(cr_intmin), data=dat, family = binomial(link="logit"))
mod_7 <- glm(lwe_first ~ factor(cr_intmin) + factor(jurisdiction), data=dat, family = binomial(link="logit"))
mod_8 <- glm(lwe_first ~ factor(cr_intmin) + factor(jurisdiction) + factor(decade), data=dat, family = binomial(link="logit"))
mod_9 <- glm(lwe_first ~ factor(cr_intmin) + factor(jurisdiction) + factor(year), data=dat, family = binomial(link="logit"))
mod_10 <- pool(with(data=dat_imputed, exp=glm(lwe_first~factor(cr_intmin) + extreme_crime_left + factor(jurisdiction) + factor(year), family = binomial(link="logit"))))

